args  mintokens maxtokens where start_year weirdo path create_info

/*
local start_year 0
local T 15
local mintokens 1
local maxtokens 3
local cutter 200
local where superall
local create_info 1
local year 2020
local month 3
local weirdo 0
*/



local path "`path'Dropbox/text analysis sandbox/programs/JEEA/"

clear all
set mem 2000m
set maxvar 30000

if `create_info' == 1 {
	insheet using "`path'data_info_all_svi.csv", comma
	gen year = floor(date / 10000)
	gen month = floor((date - year * 10000)/100)

	gen quarter = floor((month - 1)/3) + 1
	drop date
	drop if article_id == .
	replace country = substr(country, 1, length(country) - 4) if substr(country, -4, 4) ==  " new"

	

	
	*New from Baba
	replace country="Antigua and Barbuda" if country=="Antigua"
	replace country="Brunei Darussalam" if country=="Brunei"
	replace country="Burkina Faso" if country=="Burkina faso"
	replace country="Cape Verde" if country=="Cape verde"
	replace country="Cameroon" if country=="Camerun"
	
	replace country="Congo (Democratic Republic of the)" if country=="Congo Democratic Republic"
	replace country="Congo (Democratic Republic of the)" if country=="Congo Democratic"
	replace country="Congo" if country=="Congo Republic"
	replace country="Costa Rica" if country=="Costa rica"
	replace country="Ivory Coast" if country=="Cote d'ivoire"
	replace country="Czech Republic" if country=="Czech"
	replace country="Dominican Republic" if country=="Dominica Republic"
	replace country="East Timor" if country=="East timor"
	replace country="El Salvador" if country=="El savador"
	replace country="El Salvador" if country=="El Savador"
	replace country="Guinea Bissau" if country=="Guninea-bissau"
	replace country="Hong Kong" if country=="Hongkong"
	replace country="Malasya" if country=="Malaysia"
	replace country="Marshall Islands" if country=="Marshall Island"
	replace country="Netherlands" if country=="Netherland"
	replace country="New Zealand" if country=="New zealand"
	replace country="Puerto Rico" if country=="Puerto rico"
	replace country="Saint Kitts and Nevis" if country=="Saint Kitts-Nevis"
	replace country="Saint Vincent and the Grenadines" if country=="Saint Vincent and Grenadines"
	replace country="Sao Tome and Principe" if country=="Saint tome"
	replace country="Sao Tome and Principe" if country=="Saint tome abd principe"
	replace country="Saudi Arabia" if country=="Saudi arabia"
	replace country="Serbia" if country=="Serbia and Montengro"
	replace country="Singapure" if country=="Singapore"
	replace country="Solomon Island" if country=="Solomon Islands"
	replace country="South Korea" if country=="South korea"
	replace country="South Sudan" if country=="South sudan"
	replace country="Trinidad and Tobago" if country=="Trinidad and tobago"
	replace country="United Arab Emirates" if country=="United Arab Emirate"
	replace country="United States of America" if country=="United States"
	
	*old
	replace country="Guinea-Bissau" if country=="Guinea Bissau"
	replace country="Guinea-Bissau" if country=="Guinea-bissau"
	replace country="Cote d'Ivoire" if country=="Ivory Coast"
	replace country="Libyan Arab Jamahiriya" if country=="Libya"
	replace country="Malaysia" if country=="Malasya"
	replace country="Lao People's Democratic Republic" if country=="Laos"
	replace country="Kazakstan" if country=="Kazakhstan"
	replace country="Russian Federation" if country=="Russia"
	replace country="Serbia and Montenegro" if country=="Serbia"
	replace country="Singapore" if country=="Singapure"
	replace country="Viet Nam" if country=="Vietnam"
	replace country="Syrian Arab Republic" if country=="Syria"
	replace country="Korea" if country=="South Korea"
	replace country="Macau (Aomen)" if country=="Macau"
	replace country="Tanzania, United Rep. of" if country=="Tanzania"
	replace country="Solomon Islands" if country=="Solomon Island"
	replace country="Philippines" if country=="Philipines"
	replace country="United Kingdom" if country=="United Kingdom old"
	
	
	replace country = "United States of America" if country == "United States"
	replace country = "Guinea-Bissau" if country == "Guinea Bissau"
	replace country = "Cote d'Ivoire" if country == "Ivory Coast"
	replace country = "Kazakstan" if country == "Kazakhstan"
	replace country = "Lao People's Democratic Republic" if country == "Laos"
	replace country = "Libyan Arab Jamahiriya" if country == "Libya"
	replace country = "Macau (Aomen)" if country == "Macau"
	replace country = "Malaysia" if country == "Malasya"
	replace country = "Russian Federation" if country == "Russia"
	replace country = "Saint Kitts and Nevis" if country == "Saint Kitts-Nevis"
	replace country = "Serbia and Montenegro" if country == "Serbia"
	replace country = "Singapore" if country == "Singapure"
	replace country = "Solomon Islands" if country == "Solomon Island"
	replace country = "Korea" if country == "South Korea"
	replace country = "Syrian Arab Republic" if country == "Syria"
	replace country = "Viet Nam" if country == "Vietnam"
	
	
	
	
	
	sort article_id
	save "`path'data_info_all_svi", replace
	
	*add stacked bar graphs
	
	*descriptive figures
	gen timeq=yq(year, quarter)
	format timeq %tq
	gen obs = 1
	
	
	graph bar (sum) obs, over(paper) ///
	plotregion(fcolor(white)) graphregion(fcolor(white)) legend(off) ytitle("")
	graph export "`path'`where'/articles_per_paper.pdf" ,  replace 
	
	/*
	graph bar (sum) obs, by(paper) over(country, label(angle(90) labsize(tiny))) ///
	plotregion(fcolor(white)) graphregion(fcolor(white))  legend(off) ytitle("")
	graph export "`path'`where'/articles_per_country_paper.pdf" , replace 
	
	
	graph bar (sum) obs, over(country, label(angle(90) labsize(tiny)))  ///
	plotregion(fcolor(white)) graphregion(fcolor(white))  legend(off) ytitle("")
	graph export "`path'`where'/articles_per_country.pdf" , replace 
	*/
	
	collapse (sum) obs, by(paper timeq)
	
	graph drop _all
	levelsof paper, local(levels) 
	local x = 1
	foreach l of local levels {
		
		twoway (line obs timeq if paper == "`l'"), ///
		plotregion(fcolor(white)) graphregion(fcolor(white)) legend(off) ytitle("") name(fig`x') ///
		subtitle("`l'")
		local x = `x' + 1
	}
	graph combine  fig1 fig2 fig3 fig4 fig5, plotregion(fcolor(white)) graphregion(fcolor(white))
	graph export "`path'`where'/articles_overtime_per_paper.pdf" , replace 
	
	collapse (sum) obs, by(timeq)
	
	twoway (line obs timeq), ///
	plotregion(fcolor(white)) graphregion(fcolor(white)) legend(off) ytitle("")
	graph export "`path'`where'/articles_overtime.pdf" , replace 
	
	*stacked articles per country
	clear
	use "`path'data_info_all_svi"
	
	gen timeq=yq(year, quarter)
	format timeq %tq
	gen obs = 1
	
	levelsof paper, local(levels) 
	foreach l of local levels {
		gen obs_`l' = obs if paper == "`l'"
	}
	
	collapse (sum) obs obs_*, by(country)
	gsort -  obs
	keep if _n <= 100	
	graph bar (sum) obs_*, over(country, label(angle(90) labsize(tiny))) stack  ///
	plotregion(fcolor(white)) graphregion(fcolor(white))
	graph export "`path'`where'/articles_per_country_stacked.pdf" , replace 
	
	*articles per country
	clear all
	use "`path'data_info_all_svi"
	local x = 1
	levelsof paper, local(levels) 
	foreach l of local levels {
		clear
		use "`path'data_info_all_svi"
		
		gen timeq=yq(year, quarter)
		format timeq %tq
		gen obs = 1
		keep if paper == "`l'"
		collapse (sum) obs, by(country)
	
		gsort -  obs
		keep if _n <= 30
		gen id = _n
		labmask id, values(country) 
		twoway (bar obs id), xlabel(1(1)30,valuelabel angle(vertical) labsize(vsmall)) ///
		subtitle("`l'") ///
		plotregion(fcolor(white)) graphregion(fcolor(white)) xtitle("") ytitle("") name(fig`x')
		local x = `x' + 1
	}
	
	graph combine  fig1 fig2 fig3 fig4 fig5, plotregion(fcolor(white)) graphregion(fcolor(white))
	graph export "`path'`where'/articles_per_paper_per_country.pdf" , replace 
	
	
	clear
	use "`path'data_info_all_svi"
	
	gen timeq=yq(year, quarter)
	format timeq %tq
	gen obs = 1
	
	collapse (sum) obs, by(country)
	
	gsort - obs
	keep if _n <= 100
	gen id = _n
	labmask id, values(country) 
	twoway (bar obs id), xlabel(1(1)100,valuelabel angle(vertical) labsize(tiny)) ///
	plotregion(fcolor(white)) graphregion(fcolor(white)) xtitle("") ytitle("")
	graph export "`path'`where'/articles_per_country.pdf" , replace 
	
	*clear
	*use `path'data_info_all_svi
	
}



local toke_int `mintokens'_`maxtokens'

clear all
insheet using "`path'`where'/token_info_svi_`toke_int'_`start_year'_`weirdo'.csv", comma
drop if _n == 1
drop v1
destring token_count, gen(tokens)
drop token_count
gen ach = log(tokens)
histogram ach
graph export "`path'`where'/token_distrib.pdf" , replace 
drop ach
sort article_id
save "`path'`where'/token_info_svi_`toke_int'_`start_year'_`weirdo'", replace

erase "`path'data_info_all_svi.csv"
erase "`path'`where'/token_info_svi_`toke_int'_`start_year'_`weirdo'.csv"
